/* 3 observations per district-year: factories above and below 50, 50-100, 100 workers */
/* Total number of district-years same as master district level file */
/* That is, it's like splitting that dataset into 2 per district-year. Cells with zero employment etc are possible. */
/* master do file for collapsing every ASI dataset to district level (asicode88) and creating a district level dataset*/
/*Note: Labor regs: In Karnataka, unlike all other states, labor regulation changed once between 1987-93.8*/
/* Note on ASI years: REFERENCE YEAR   for ASI 2001-2002 is the accounting year of the factory ending on 31 st March, 2002 (ASI Documentation).*/ 

clear
set more off
set mem 400m
set matsize 6000


global do		"C:\Users\Siddharth\Desktop\factories\do" 
global rain		"C:\Users\Siddharth\Desktop\factories\data\rainfall"
global data		"C:\Users\Siddharth\Desktop\factories\data"



/*
global log		"C:\factories\log"
global do		"C:\factories\do"
global rain		"C:\factories\data\rainfall"
global data		"C:\factories\data"
global output 	"C:\factories\output"
*/

*varlist
global varlist  "factories closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent   workers  mandays workerswages totaloutput depreciation  electricity  fuel materials   valueadded profits"
global varlist1  "factories1 closingvalueland1 fixedcapitalop1 fixedcapitalcl1 landrental1 totalrent1  workers1  mandays1  workerswages1  totaloutput1  depreciation1   electricity1   fuel1  materials1  valueadded1  profits1"
global varlist2  "factories2 closingvalueland2 fixedcapitalop2 fixedcapitalcl2 landrental2 totalrent2  workers2  mandays2  workerswages2  totaloutput2  depreciation2   electricity2   fuel2  materials2  valueadded2  profits2"
global varlist3  "factories3 closingvalueland3 fixedcapitalop3 fixedcapitalcl3 landrental3 totalrent3  workers3  mandays3  workerswages3  totaloutput3  depreciation3   electricity3   fuel3  materials3  valueadded3  profits3"



* generate rain data from delaware raw precip files
do "$do\gen_rain.do"

use "$data\asi_50_55_43_code91.dta", clear
sort asicode99
/*tempfile codes
save `codes'*/
save "$data\asi_50_55_43_code91.dta", replace


* merge rain data with ASI district codes by calculating closest rainfall grid point, and gen rainfall shocks
do "$do\merge_rain.do"

use "$data\asi_50_55_43_code91.dta", clear
sort code91
merge code91 using "$rain\district_rain_wide_code91.dta"
keep if _m == 3
drop _m  bsr96_district bsr00_district asi_district_name asi_state_ut asicode99 distt_id50 distt_id43 distt_id55 code91  longitude latitude glon glat
collapse (mean) shock30* shock50* shockdev* shockperc* shockctsdev* shockpctile* shocknorm* shockhigh* shocklow*, by(asicode88)
reshape long shock30 shock50 shockdev shockperc shockctsdev  shockpctile shocknorm shockhigh shocklow, i(asicode88) j(year)
* some non-integer shockpctile values
replace shockpctile = -1 if shockpctile < 0
replace shockpctile = 1 if shockpctile > 0
drop if asicode88 == .
sort asicode88 year
save "$data\rain_asicode88",replace


* ASI 1987-88
* clean factory data
do "$do\process88_sid.do"
keep asicode88 price88 multiplier $varlist
ge small = workers <50
ge medium = (workers >= 50&workers<100)
ge large=(workers>=100)
egen x = mean(price88)
replace price88 = x if price88 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price88
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
* small, medium and large factories
foreach v of global varlist{
ge `v'1 = `v'*small
ge `v'2 = `v'*medium
ge `v'3 = `v'*large
}

collapse (sum) $varlist1 $varlist2 $varlist3, by (asicode88)
reshape long $varlist, i(asicode88) j(size)
ge year = 1987
save "$data\dist88.dta",replace

* ASI 1990-91
do "$do\process91_sid.do"
keep asicode88 price91  multiplier $varlist
ge small = workers <50
ge medium = (workers >= 50&workers<100)
ge large = (workers>100)

*missing price index
egen x = mean(price91)
replace price91 = x if price91 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent workerswages totaloutput depreciation  electricity  fuel materials valueadded profits"
foreach v of local l{
replace `v' = `v'/price91
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
* small,medium and large factories
foreach v of global varlist{
ge `v'1 = `v'*small
ge `v'2 = `v'*medium
ge `v'3 = `v'*large
}

collapse (sum) $varlist1 $varlist2 $varlist3, by (asicode88)
reshape long $varlist, i(asicode88) j(size)
ge year = 1990
save "$data\dist91.dta",replace

* ASI 1993-94
do "$do\process94 - ach.do"
keep asicode99 price94 multiplier $varlist
ge small = workers <50
ge medium = (workers >= 50&workers<100)
ge large = (workers>=100)
*missing price index
egen x = mean(price94)
replace price94 = x if price94 == .
drop x
* deflating
local l "closingvalueland fixedcapitalop fixedcapitalcl landrental totalrent  workerswages totaloutput depreciation  electricity  fuel materials    valueadded profits"
foreach v of local l{
replace `v' = `v'/price94
}
*multiplying
foreach v of global varlist{
replace `v' = `v'* multiplier
}
* small and large factories
foreach v of global varlist{
ge `v'1 = `v'*small
ge `v'2 = `v'*medium
ge `v'3 = `v'*large
}
collapse (sum) $varlist1 $varlist2 $varlist3, by (asicode99)
sort asicode99
merge asicode99 using "$data\asi_50_55_43_code91.dta"
keep if _m == 3
drop _m
collapse (sum) $varlist1 $varlist2 $varlist3, by (asicode88)
reshape long $varlist, i(asicode88) j(size)
ge year = 1993
save "$data\dist94.dta",replace


*matching different version of statecodes
use "$data\asi_50_55_43_code91.dta", clear
ge statecode88 =  int(asicode88/100)
ge statecode =  int(asicode99/100)
keep statecode88 statecode
duplicates drop
drop if statecode88 == .
sort statecode
save "$data\scodes", replace



*  labor regulation data
use $data\labregrainfall.dta, clear
*sort statecode
*merge statecode using `scodes'
*keep if _m == 3
*drop _m
sort statecode88 year
tempfile lab_temp
save `lab_temp'




* append factory data and merge with rain data

use "$data\dist88.dta", clear
append using "$data\dist91.dta"
append using "$data\dist94.dta"

* dummy for small
ge large = size == 3
ge medium = (size==2)


sort asicode88 year
merge asicode88 year using "$data\rain_asicode88"

tab _merge
keep if _merge ==3
drop _merge

ge statecode88 =  int(asicode88/100)

sort statecode88 year
merge statecode88 year using `lab_temp', keep(statecode88 year nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow)


tab _merge
keep if _merge==3
drop _merge	
save "$data\largeasi", replace

* merging agricultural yields
* yield is constant (1985) price-weighted index of district outputs of rice, wheat, maize, jowar, bajra and barley, divided by their cultivated area.

* Agricultural yields data 
do "$do\agr_yield.do"
save "$data\yield879093_asicode88", replace

use "$data\largeasi"
sort asicode88 year
merge asicode88 year using "$data\yield879093_asicode88"
drop _m
sort asicode88
save "$data\largeasi2", replace 

* Agrarian in 1987 from NSS Unemployment data.

use "$data\nss435055_asicode88.dta", clear

keep n1_r_88 n4_r_88 n11_r_94  asicode88

ge landed88 = (n1_r_88/n11_r_94)*100
ge landless88 = (n4_r_88/n11_r_94)*100
ge agrarian88 = ((n1_r_88 + n4_r_88)/n11_r_94)*100

drop n1_r_88 n4_r_88 n11_r_94  


label var landed88 " % of working age who were self-employed in farm HH enterprise in 1988 "
label var landless88 "% of working age who were wage/casual workers in agr sector in 1988 "
label var agrarian88 "% of working age who were employed in agr in 1988"                                                 

sort asicode88 
merge asicode88 using "$data\largeasi2"
drop _m


sort asicode88
save "$data\largeasi3", replace 


*merging initial k/o ratio and percent employment in food industries

do "$do\asi87_ratios.do"
merge asicode88 using "$data\largeasi3"

drop _m

* filling in missing with state average

local l "landed88 landless88 agrarian88 food88 kqratio88"
foreach v of local l{
egen m`v' = mean(`v'), by(agrstate)
replace `v' = m`v' if `v' == .
drop m`v'
}

* logs 
local l " yield factories workers  totaloutput valueadded"
foreach v of local l{
ge l`v' = log(`v')
} 

tab year, ge(ydum)

* proe is proemployer state, prow is proworker state
*ge proe = nstrict < 0
*ge prow = nstrict > 0
*replace prow = . if nstrict == .


ge syr = year*100 + statecode
tab syr, ge(sydum)

ge round  = 1
replace round = 2 if year == 1990
replace round = 3 if year == 1993

ge distyr = asicode88*10 + round

* interactions
local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach w of local g{
ge `w'_`v' = `w'*`v'
}
}

local l "proe prow  nstrict  APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow landed88 landless88 agrarian88 kqratio88 food88"
foreach v of local l{
ge large_`v' = large*`v'
ge medium_`v' = medium*`v'
}

local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{ 
ge large_`v' = large*`v'
ge medium_`v' = medium*`v'
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach z of local g{
ge large_`z'_`v' = large*`z'_`v'
ge medium_`z'_`v' = medium*`z'_`v'
}
}

local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
ge `w'_`v' = `w'*`v'
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach z of local g{
ge `z'_`w'_`v' = `z'*`w'*`v'
}
}
}

local l "shockctsdev shocknorm shockpctile shockdev shockhigh shocklow"
foreach v of local l{
local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
ge large_`w'_`v' = large*`w'_`v'
ge medium_`w'_`v' = medium*`w'_`v'
}
}

local h "landed88 landless88 agrarian88 kqratio88 food88"
foreach w of local h{
local g " nstrict  prow proe APcode APDS Bhatt EPLprow DSproe DSprow Bhattproe Bhattprow"
foreach z of local g{
ge `z'_`w' = `z'*`w'
}
}

/* keeping only balanced panel districts
sort asicode88
merge asicode88 using "$data\balanced_code.dta"
drop _m
keep if balanced == 1
*/

* large *ydum
local l "1 2 3"
foreach v of local l{
ge large_ydum`v' = large*ydum`v'
ge medium_ydum`v' = medium*ydum`v' 
}

saveold "$data\district889194_small_medium_large_factories", replace







